There is no seasonable or periodic pattern behind these complaints.
df_ml <- df_raw %>%
filter(df_raw$Company == 'EQUIFAX, INC.') %>%
select(Product, State, `Company response to consumer`,
`Company public response`,
`Consumer complaint narrative`, `Consumer consent provided?`)
df_ml$`Company response to consumer` <- ifelse(df_ml$`Company response to consumer` == "Closed with monetary relief", 1, 0)
df_ml$`Consumer consent provided?` <- ifelse(df_ml$`Consumer consent provided?` == "Consent provided", 1, 0)
df_ml$`Consumer complaint narrative` <- ifelse(df_ml$`Consumer complaint narrative` == 'NA', 0, 1)
df_ml$`Consumer complaint narrative`[is.na(df_ml$`Consumer complaint narrative`)] <- 0
df_ml$`Company public response` <- ifelse(df_ml$`Company public response` == 'NA', 0, 1)
df_ml$`Company public response`[is.na(df_ml$`Company public response`)] <- 0
df_ml <- df_ml %>%
filter(is.na(`Company public response`) == FALSE) %>%
filter(is.na(`Consumer complaint narrative`) == FALSE) %>%
filter(is.na(`Consumer consent provided?`) == FALSE)
df_ml$`Company response to consumer` <-
factor(df_ml$`Company response to consumer`, labels = c("withMonetaryRelief", "noMonetaryRelief"),
levels = 1:0)
set.seed(12345)
in_train <- createDataPartition(y = df_ml$`Company response to consumer`,
p = 0.8, list = FALSE)
training <- df_ml[ in_train, ]
testing <- df_ml[-in_train, ]
rf <- randomForest(`Company response to consumer` ~
as.factor(training$Product) +
training$`Company response to consumer` +
training$`Company public response` +
training$`Consumer complaint narrative` +
training$`Consumer consent provided?`,
data=training,
importance = TRUE,
na.action = na.omit)